#!/usr/bin/python3
# -*- encoding: utf-8 -*-
'''
@File        :pandas学习.py
@Time        :2020/09/06 23:41:20
@Author      :He
@Software    :vsCode
'''

import pandas as pd
import os

os.chdir(os.path.abspath(os.path.dirname(__file__)))

# 1. 创建
# 不设置index列，默认从0开始的序号
# dic = {
#     'name': ['张三', '李四', '王二麻子', '小淘气'],
#     'age': [37, 30, 50, 16],
#     'gender': ['男', '男', '男', '女']
# }
# df = pd.DataFrame(dic)
# print(df.head())    # 前5行

# titleList = ['name', 'age', 'gender']
# dataList = [
#     ['张三', 37, '男'],
#     ['李四', 30, '男'],
#     ['王二麻子', 50, '男'],
#     ['小淘气', 16, '女'],
# ]
# df = pd.DataFrame(dataList, columns=titleList)
# print(df.head())    # 前5行


# # 设置指定的序号
# dic = {
#     '备注': ['不及格', '良好', '最佳', '优秀'],
#     '工资': [5000, 7000, 9000, 8500],
#     '绩效分': [60, 84, 98, 91]
# }
# indea_list = ['张三', '李四', '王二麻子', '小淘气']
# df = pd.DataFrame(dic, indea_list)
# print(df.tail())    # 最后5行

titleList = ['name', 'age', 'gender']
dataList = [
    ['张三', 37, '男'],
    ['李四', 30, '男'],
    ['王二麻子', 50, '男'],
    ['小淘气', 16, '女'],
]
indexList = ['001', '002', '003', '004']
df = pd.DataFrame(dataList, index=indexList, columns=titleList)
print(df.head())    # 前5行


# # 数据存储
# dic = {
#     '备注': ['不及格', '良好', '最佳', '优秀'],
#     '工资': [5000, 7000, 9000, 8500],
#     '绩效分': [60, 84, 98, 91]
# }
# indea_list = ['张三', '李四', '王二麻子', '小淘气']
# df = pd.DataFrame(dic, indea_list)
# df.to_csv('dataInfo.csv')


# # # 数据读取
# df = pd.read_excel('dataInfo.xlsx')
# df = pd.read_csv('dataInfo.csv', engine='python')


# # 查看数据格式
# dic = {
#     '备注': ['不及格', '良好', '最佳', '优秀'],
#     '工资': [5000, 7000, 9000, 8500],
#     '绩效分': [60, 84, 98, 91]
# }
# indea_list = ['张三', '李四', '王二麻子', '小淘气']
# df = pd.DataFrame(dic, indea_list)
# print(df.info())


# # 统计信息概览
# # 快速计算数值型数据的关键统计指标，像平均数、中位数、标准差等等。
# dic = {
#     '备注': ['不及格', '良好', '最佳', '优秀'],
#     '工资': [5000, 7000, 9000, 8500],
#     '绩效分': [60, 84, 98, 91]
# }
# indea_list = ['张三', '李四', '王二麻子', '小淘气']
# df = pd.DataFrame(dic, indea_list)
# print(df.describe())

# # 新增列
# dic = {
#     '备注': ['不及格', '良好', '最佳', '优秀'],
#     '工资': [5000, 7000, 9000, 8500],
#     '绩效分': [60, 84, 98, 91]
# }
# indea_list = ['张三', '李四', '王二麻子', '小淘气']
# df = pd.DataFrame(dic, indea_list)
# df['新增列'] = range(1, len(df) + 1)
# print(df)

# # 删除列
# df.drop('新增列', axis=1, inplace=True)
# print(df)

# # 选择某一列
# print(df['备注'])

# # 选择多列
# print(df[['备注', '工资']])

# df['新增列'] = "20%"
# print(df)
# df['新增列'] = df['新增列'].str.replace('%', '').astype(float)
# print(df.info())

# df['日期'] = '2020-09-07'
# # print(df)
# df['日期'] = pd.to_datetime(df['日期'])
# # print(df)
# # print(df.info())


# print(pd.to_datetime('2021-01-01') - df['日期'])
